For this Seurat analysis, we used the following parameters:

pars <- as.data.frame(do.call("rbind", params))
colnames(pars) <- "parameters_used"
pars

Starting with this Seurat object from Mireille.

path <- "/fast/work/groups/cubi/milekm/mireille/de/fracture-healing-and-aging-scseq/DE_analysis/miha"
sobj <- readRDS(file.path(path,params$path_to_object))
selected_cell_types <- sobj[[]] %>% 
  pull(cluster_id) %>% 
  unique()

sample <- sobj[[]] %>%
  select(group,mouse) %>%
  mutate(sample=paste0(group,"_",mouse)) %>%
  select(sample)

sobj <- AddMetaData(sobj, metadata = sample)

# cluster_metadata_3 <- sobj[[]] %>%
#     mutate(age = str_extract(group, "^[0-9]+w")) %>%
#     mutate(strain = str_extract(group, "w(E|SPF|)")) %>% 
#     mutate(time = str_extract(group, "[0-9]+days")) %>%
#     mutate(arm = paste(age, strain, time, sep = "_")) %>%
#     group_by(arm) %>%
#     mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse)))) %>% 
#   select(sample, cell_type, group, mouse, age, location, time, strain,pseudoID) %>% 
#   distinct()

expr <- list()

for (cell_type in selected_cell_types) {
 for (sample in unique(sobj[[]]$sample)) {
   # sample <- unique(sobj[[]]$sample)[1]
   # cell_type <- selected_cell_types[1]
   cells <- Cells(sobj)[(sobj@meta.data$sample==sample & sobj$cluster_id==cell_type )  ] 
   cells <- cells[!is.na(cells)]
   if (length(cells) > 1) {
     expr[[paste0(cell_type,';',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
   }
 }
}

for (sample in unique(sobj[[]]$sample)) {
 cells <- Cells(sobj)[(sobj@meta.data$sample==sample )]
 cells <- cells[!is.na(cells)]
 expr[[paste0('all;',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
}


sample_name <- sub(".*-","",sub(";.*","", sub(';','-',names(expr))))

colData<-data.frame(sample=names(expr),
               cell_type=factor(sub(';.*','',names(expr))),
               sample_name=sample_name,
               group=sub("_11.*","",sub(".*;","",names(expr))),
               mouse=sub('.*s_11','11',names(expr)) ,
               age=sub("w.*","w", sample_name),
               location=sub("_.*", "", sub(".*[wEF]","", sample_name )),
               time = sub(".*_", "", sub("days.*", "days", sample_name)),
               strain = ifelse(grepl("12w",sample_name), "young",
                               ifelse(grepl("SPF", sample_name), "aged",
                                      ifelse(grepl("wE", sample_name),"immuno", NA))))

colData <- colData %>%
  mutate(age_time_strain = paste0(age, "_", time, "_", strain)) %>%
  group_by(age_time_strain) %>%
  mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse))))

counts <- do.call(cbind, expr)

# write.table(counts, "all_counts_over_cell_types_expanded_unique.tsv", quote = F, sep ="\t", row.names=T)

Get number of reads per sample.

lengths <- colSums(counts)
df_lengths <- data.frame(sample = names(lengths),
                         n_reads = lengths) %>%
  mutate(group = sub("_11.*","",sub(".*;", "",sample)),
         celltype = sub(";.*", "", sample))  

ggplot(df_lengths, aes(celltype,n_reads,fill=group))+
  geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
  geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2) +
  facet_wrap(~celltype, scales="free")

First, we run DESeq2 by including the pseudoID in the model. Here we assume that the age_time_location of the mouse is replicated in triplicate (pseudoID).

# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all", "Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")

selected_contrasts <- c("12wfx_5days_vs_12wfx_2days", 
                        "26wEfx_5days_vs_26wEfx_2days",
                        "26wSPFfx_5days_vs_26wSPFfx_2days")
res <- list()

for (celltype in selected_cell_types){
  
  coldata_df <- colData %>%
    dplyr::filter(cell_type %in% celltype)
  counts_df <- subset(counts, select = coldata_df$sample)
  if (sum(colSums(counts_df)) != 0){
    dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
                                  colData=coldata_df,
                                  design=~pseudoID + group)
    contrasts <- selected_contrasts
    sub_res <- list()
    
    for (comparison in contrasts){
      perturbation <- sub("_vs_.*", "", comparison)
      reference <- sub(".*_vs_", "", comparison)
      dds$group <- relevel(dds$group, ref = reference)
      
      if (celltype != "all" & celltype != "all_cells"){
        dds <- estimateSizeFactors(dds, type='poscounts')
        dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
      } else {
        dds <- estimateSizeFactors(dds)
        dds <- DESeq(dds)
      }
      
      if(params$shrinkage_type=="apeglm"){
        sub_res[[comparison]] <- lfcShrink(dds, coef=paste0("group_",comparison), type=params$shrinkage_type, format="DataFrame") %>%
          as.data.frame() %>%
          tibble::rownames_to_column('gene_name') %>%
          mutate(cell_type=celltype, contrast = comparison)
      } else {
        
        sub_res[[comparison]] <- lfcShrink(dds, contrast=c("group",perturbation, reference), type=params$shrinkage_type, format="DataFrame") %>%
          as.data.frame() %>%
          tibble::rownames_to_column('gene_name') %>%
          mutate(cell_type=celltype, contrast = comparison)
      }
    }
    
  }
  res[[celltype]] <- do.call("rbind", sub_res) %>%
    tibble::remove_rownames()
}

res_df <- do.call("rbind", res) %>%
  tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
  p <- res_df %>%                  
    dplyr::filter(!is.na(padj), contrast == comparison) %>%
    ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) + 
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison,padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2, scales = "free")
  print(p)
  
}

for (comparison in unique(res_df$contrast)){
  p <- res_df %>%
    dplyr::filter(contrast == comparison) %>%
    ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison, padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    scale_x_continuous(trans='log10') +
    coord_cartesian(ylim =c(-4,4)) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2)
  print(p)
}

res_df %>%
  dplyr::arrange(padj) %>%
  dplyr::filter(!is.na(padj) & (padj < .05)) %>%
  dplyr::mutate_if(is.numeric, signif, 2) %>%
  DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)

Repeat this without pseudoID in the model. So only ~group.

# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all","Myeloid Precursor",  "B Cells","Neutrophils", "Monocytes / Macrophages")

selected_contrasts <- c("12wfx_5days_vs_12wfx_2days", 
                        "26wEfx_5days_vs_26wEfx_2days",
                        "26wSPFfx_5days_vs_26wSPFfx_2days")
res <- list()

for (celltype in selected_cell_types){
  
  coldata_df <- colData %>%
    dplyr::filter(cell_type %in% celltype)
  counts_df <- subset(counts, select = coldata_df$sample)
  if (sum(colSums(counts_df)) != 0){
    dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
                                  colData=coldata_df,
                                  design=~group)
    contrasts <- selected_contrasts
    sub_res <- list()
    
    for (comparison in contrasts){
      perturbation <- sub("_vs_.*", "", comparison)
      reference <- sub(".*_vs_", "", comparison)
      dds$group <- relevel(dds$group, ref = reference)
      
      if (celltype != "all" & celltype != "all_cells"){
        dds <- estimateSizeFactors(dds, type='poscounts')
        dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
      } else {
        dds <- estimateSizeFactors(dds)
        dds <- DESeq(dds)
      }
      
      if(params$shrinkage_type=="apeglm"){
        sub_res[[comparison]] <- lfcShrink(dds, coef=paste0("group_",comparison), type=params$shrinkage_type, format="DataFrame") %>%
          as.data.frame() %>%
          tibble::rownames_to_column('gene_name') %>%
          mutate(cell_type=celltype, contrast = comparison)
      } else {
        
        sub_res[[comparison]] <- lfcShrink(dds, contrast=c("group",perturbation, reference), type=params$shrinkage_type, format="DataFrame") %>%
          as.data.frame() %>%
          tibble::rownames_to_column('gene_name') %>%
          mutate(cell_type=celltype, contrast = comparison)
      }
    }
    
  }
  res[[celltype]] <- do.call("rbind", sub_res) %>%
    tibble::remove_rownames()
}

res_df <- do.call("rbind", res) %>%
  tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
  p <- res_df %>%                  
    dplyr::filter(!is.na(padj), contrast == comparison) %>%
    ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) + 
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison,padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2, scales = "free")
  print(p)
}

for (comparison in unique(res_df$contrast)){
  
  p <- res_df %>%
    dplyr::filter(contrast == comparison) %>%
    ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison, padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    scale_x_continuous(trans='log10') +
    coord_cartesian(ylim =c(-4,4)) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2)
  print(p)
}

res_df %>%
  dplyr::arrange(padj) %>%
  dplyr::filter(!is.na(padj) & (padj < .05)) %>%
  dplyr::mutate_if(is.numeric, signif, 2) %>%
  DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)

Here, we run DESeq2 by including the mouse ID in the model (not by pseudoID but by actual mouse ID). For this, we have to select the subset of the mice by age_time_strain, so that the model can be full rank.

# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all", "Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")

age_time_strain <- unique(colData$age_time_strain)

res <- list()

for (which_mice in age_time_strain){
  for (celltype in selected_cell_types){
    coldata_df <- colData %>%
      dplyr::filter(cell_type %in% celltype, age_time_strain == which_mice)
    counts_df <- subset(counts, select = coldata_df$sample)
    which_groups <- unique(coldata_df$group)
    contrasts <- expand_grid(perturbation=which_groups, reference=which_groups) %>%
      filter(perturbation < reference) %>%
      mutate(contrast = paste0(perturbation,"_vs_",reference)) %>%
      pull(contrast)
    sub_res <- list()
    for (comparison in contrasts){
      perturbation <- sub("_vs_.*", "", comparison)
      reference <- sub(".*_vs_", "", comparison)
      
      if (sum(colSums(counts_df)) != 0){
        dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
                                      colData=coldata_df,
                                      design=~mouse + group)
        dds$group <- relevel(dds$group, ref = reference)
        if (celltype != "all" & celltype != "all_cells"){
          dds <- estimateSizeFactors(dds, type='poscounts')
          dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
        } else {
          dds <- estimateSizeFactors(dds)
          dds <- DESeq(dds)
        }
      }
      
      if(params$shrinkage_type=="apeglm"){
        sub_res[[comparison]] <- lfcShrink(dds, coef=paste0("group_",comparison), type=params$shrinkage_type, format="DataFrame") %>%
          as.data.frame() %>%
          tibble::rownames_to_column('gene_name') %>%
          mutate(cell_type=celltype, contrast = comparison)
      } else {
        sub_res[[comparison]] <- lfcShrink(dds, contrast=c("group",perturbation, reference), type=params$shrinkage_type, format="DataFrame") %>%
          as.data.frame() %>%
          tibble::rownames_to_column('gene_name') %>%
          mutate(cell_type=celltype, contrast = comparison)
      }
    }
    res[[celltype]] <- do.call("rbind", sub_res) %>%
      tibble::remove_rownames()
  }
}
res_df <- do.call("rbind", res) %>%
  tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
  p <- res_df %>%
    dplyr::filter(!is.na(padj), contrast == comparison) %>%
    ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison,padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2, scales = "free")
  print(p)
}

for (comparison in unique(res_df$contrast)){
  p <- res_df %>%
    dplyr::filter(contrast == comparison) %>%
    ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison, padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    scale_x_continuous(trans='log10') +
    coord_cartesian(ylim =c(-4,4)) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2)
  print(p)
}

res_df %>%
  dplyr::arrange(padj) %>%
  dplyr::filter(!is.na(padj) & (padj < .05)) %>%
  dplyr::mutate_if(is.numeric, signif, 2) %>%
  DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# # write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)

Fourth approach is to run all contrasts separately and put only group into the model. We compare these results.

res <- list()

# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all", "Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")
condition_perturbation <- "12wfx_5days"
condition_reference <- "12wfx_2days"


for (celltype in selected_cell_types){
  # celltype<-"B Cells"
  coldata_df <- colData %>%
    dplyr::filter(cell_type == celltype & (group ==condition_perturbation | group == condition_reference) )
  counts_df <- subset(counts, select = coldata_df$sample)
  if (sum(colSums(counts_df) != 0) & length(unique(coldata_df$group)) == 2){
    dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
                                  colData=coldata_df,
                                  design=~group)
    dds$group <- relevel(dds$group, ref = condition_reference)

    if (celltype != "all"){
      dds <- estimateSizeFactors(dds, type='poscounts')
      dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
    } else {
      dds <- estimateSizeFactors(dds)
      dds <- DESeq(dds)
    }

    res[[celltype]] <- lfcShrink(dds, coef=2, type=params$shrinkage_type, format="DataFrame") %>%
      as.data.frame() %>%
      tibble::rownames_to_column('gene_name') %>%
      mutate(cell_type=celltype, contrast = resultsNames(dds)[2])
  }
}

res_df <- do.call("rbind", res) %>%
  tibble::remove_rownames()
res_df %>%
  dplyr::filter(!is.na(padj)) %>%
  ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
  geom_point(size=.5, shape=1) +
  geom_label_repel(data=res_df %>%
                    dplyr::filter(padj < .05),
                  aes(label=gene_name),
                  segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
                  label.padding=0.05,label.size=NA) +
  scale_color_manual(values=c('black','red')) +
  theme_classic() +
  theme(legend.position='none',
        strip.background=element_blank())+
  ggtitle(res_df$contrast)+
  facet_wrap(~cell_type, ncol = 2, scales = "free")

for (comparison in unique(res_df$contrast)){
  p <- res_df %>%
    dplyr::filter(contrast == comparison) %>%
    ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison, padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    scale_x_continuous(trans='log10') +
    coord_cartesian(ylim =c(-4,4)) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2)
  print(p)
}

res_df %>%
  dplyr::arrange(padj) %>%
  dplyr::filter(!is.na(padj) & (padj < .05)) %>%
  dplyr::mutate_if(is.numeric, signif, 2) %>%
  DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)
res <- list()

# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all", "Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")
condition_perturbation <- "26wEfx_5days"
condition_reference <- "26wEfx_2days"



for (celltype in selected_cell_types){
  # celltype<-"B Cells"
  coldata_df <- colData %>%
    dplyr::filter(cell_type == celltype & (group ==condition_perturbation | group == condition_reference) )
  counts_df <- subset(counts, select = coldata_df$sample)
  if (sum(colSums(counts_df) != 0) & length(unique(coldata_df$group)) == 2){
    dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
                                  colData=coldata_df,
                                  design=~group)
    dds$group <- relevel(dds$group, ref = condition_reference)

    if (celltype != "all"){
      dds <- estimateSizeFactors(dds, type='poscounts')
      dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
    } else {
      dds <- estimateSizeFactors(dds)
      dds <- DESeq(dds)
    }

    res[[celltype]] <- lfcShrink(dds, coef=2, type=params$shrinkage_type, format="DataFrame") %>%
      as.data.frame() %>%
      tibble::rownames_to_column('gene_name') %>%
      mutate(cell_type=celltype, contrast = resultsNames(dds)[2])
  }
}

res_df <- do.call("rbind", res) %>%
  tibble::remove_rownames()
res_df %>%
  dplyr::filter(!is.na(padj)) %>%
  ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
  geom_point(size=.5, shape=1) +
  geom_label_repel(data=res_df %>%
                    dplyr::filter(padj < .05),
                  aes(label=gene_name),
                  segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
                  label.padding=0.05,label.size=NA) +
  scale_color_manual(values=c('black','red')) +
  theme_classic() +
  theme(legend.position='none',
        strip.background=element_blank())+
  ggtitle(res_df$contrast)+
  facet_wrap(~cell_type, ncol = 2, scales = "free")

for (comparison in unique(res_df$contrast)){
  p <- res_df %>%
    dplyr::filter(contrast == comparison) %>%
    ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison, padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    scale_x_continuous(trans='log10') +
    coord_cartesian(ylim =c(-4,4)) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2)
  print(p)
}

res_df %>%
  dplyr::arrange(padj) %>%
  dplyr::filter(!is.na(padj) & (padj < .05)) %>%
  dplyr::mutate_if(is.numeric, signif, 2) %>%
  DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)
res <- list()

# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all","Myeloid Precursor",  "B Cells","Neutrophils", "Monocytes / Macrophages")
condition_perturbation <- "26wSPFfx_5days"
condition_reference <- "26wSPFfx_2days"

for (celltype in selected_cell_types){
  # celltype<-"B Cells"
  coldata_df <- colData %>%
    dplyr::filter(cell_type == celltype & (group ==condition_perturbation | group == condition_reference) )
  counts_df <- subset(counts, select = coldata_df$sample)
  if (sum(colSums(counts_df) != 0) & length(unique(coldata_df$group)) == 2){
    dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
                                  colData=coldata_df,
                                  design=~group)
    dds$group <- relevel(dds$group, ref = condition_reference)

    if (celltype != "all"){
      dds <- estimateSizeFactors(dds, type='poscounts')
      dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
    } else {
      dds <- estimateSizeFactors(dds)
      dds <- DESeq(dds)
    }

    res[[celltype]] <- lfcShrink(dds, coef=2, type=params$shrinkage_type, format="DataFrame") %>%
      as.data.frame() %>%
      tibble::rownames_to_column('gene_name') %>%
      mutate(cell_type=celltype, contrast = resultsNames(dds)[2])
  }
}

res_df <- do.call("rbind", res) %>%
  tibble::remove_rownames()
res_df %>%
  dplyr::filter(!is.na(padj)) %>%
  ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
  geom_point(size=.5, shape=1) +
  geom_label_repel(data=res_df %>%
                    dplyr::filter(padj < .05),
                  aes(label=gene_name),
                  segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
                  label.padding=0.05,label.size=NA) +
  scale_color_manual(values=c('black','red')) +
  theme_classic() +
  theme(legend.position='none',
        strip.background=element_blank())+
  ggtitle(res_df$contrast)+
  facet_wrap(~cell_type, ncol = 2, scales = "free")

for (comparison in unique(res_df$contrast)){
  p <- res_df %>%
    dplyr::filter(contrast == comparison) %>%
    ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison, padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    scale_x_continuous(trans='log10') +
    coord_cartesian(ylim =c(-4,4)) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2)
  print(p)
}

res_df %>%
  dplyr::arrange(padj) %>%
  dplyr::filter(!is.na(padj) & (padj < .05)) %>%
  dplyr::mutate_if(is.numeric, signif, 2) %>%
  DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)

Next, we show enrichment of transcriptional modules.

# library(tmod)
# 
# genes <- lapply(res, function(x) x %>% 
#                   filter(!is.na(padj)) %>%
#                   arrange(pvalue))
# lgenes <- lapply(genes, function(x) x$gene_name)
# 
# df2tmod <- function(df, gene_id_col=ncol(df), module_id_col=1, module_title_col=2) {
# 
#   require(tmod, quietly=TRUE)
# 
#   gene_ids <- df[[gene_id_col]]
#   module_ids <- df[[module_id_col]]
#   m2g <- tapply(gene_ids, module_ids, list)
#   df[[gene_id_col]] <- NULL
#   df <- df[!duplicated(df[[module_id_col]]), ]
#   colnames(df)[module_id_col] <- "ID"
#   colnames(df)[module_title_col] <- "Title"
# 
#   makeTmod(modules=df, modules2genes=m2g)
# }
# 
# df <- as.data.frame(msigdbr::msigdbr(species='Mus musculus'))
# df <- df[ , c("gs_name", "gs_id", "gs_cat", "gs_subcat", "gene_symbol") ]
# colnames(df) <- c("Title", "ID", "Category", "Subcategory", "GeneID")
# msig <- df2tmod(df[!is.na(df$GeneID),], gene_id_col=ncol(df), module_id_col=2, module_title_col=1)
# sel <- (msig$gs$Category %in% c("H"))  | (msig$gs$Subcategory %in% c('CP:REACTOME','GO:BP','CP:KEGG'))
# 
# res.tmod <- lapply(lgenes, tmodCERNOtest, mset = msig[sel])
# 
# res.tmod.filtered <- lapply(res.tmod, function(x) subset(x, adj.P.Val < 1e-10 & AUC > .7))
# 
# pie <- lapply(genes, function(x) tmodDecideTests(x$gene_name, lfc = x$log2FoldChange,
#                                                  pval = x$pvalue, lfc.thr=0.1, mset = msig[sel]))
# pie <- lapply(pie, as.data.frame)
# 
# tmodPanelPlot(res.tmod.filtered, text.cex = .8, pie = pie, pie.style="rug", grid="at")
# 
# 
# display_terms <- function(df){
#   if (nrow(res.tmod.filtered[[df]]) >0){
#   res.tmod.filtered[[df]]$cell_type <- df
#   res.tmod.filtered[[df]] %>%
#   dplyr::arrange(adj.P.Val) %>%
#   dplyr::mutate_if(is.numeric, signif, 2) %>%
#   DT::datatable(caption=df,rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
#   }
# }
# 
# 
# lapply(names(res.tmod.filtered), display_terms )
# 
# 
# for (cell_type in names(res.tmod.filtered)){
#   # cell_type <- "all_cells"
#   res.tmod.filtered[[cell_type]] %>%
#   dplyr::select(ID,Title,AUC,adj.P.Val) %>%
#   dplyr::mutate_if(is.numeric, signif,2) %>%
#   DT::datatable(caption=cell_type, rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# }
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Rocky Linux 8.7 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /data/cephfs-1/work/groups/cubi/users/milekm_c/miniconda/envs/R-fixed-biomart-deseq/lib/libopenblasp-r0.3.25.so;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=de_DE.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] stringr_1.5.1               ggrepel_0.9.4              
##  [3] DESeq2_1.40.2               SummarizedExperiment_1.30.2
##  [5] Biobase_2.60.0              MatrixGenerics_1.12.2      
##  [7] matrixStats_1.2.0           GenomicRanges_1.52.0       
##  [9] GenomeInfoDb_1.36.1         IRanges_2.34.1             
## [11] S4Vectors_0.38.1            BiocGenerics_0.46.0        
## [13] cowplot_1.1.1               ggplot2_3.4.4              
## [15] gtools_3.9.5                tidyr_1.3.0                
## [17] dplyr_1.1.4                 SeuratObject_5.0.1         
## [19] Seurat_4.4.0               
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      jsonlite_1.8.8          magrittr_2.0.3         
##   [4] spatstat.utils_3.0-4    farver_2.1.1            rmarkdown_2.25         
##   [7] zlibbioc_1.46.0         vctrs_0.6.5             ROCR_1.0-11            
##  [10] spatstat.explore_3.2-5  RCurl_1.98-1.13         SQUAREM_2021.1         
##  [13] mixsqp_0.3-48           S4Arrays_1.0.4          htmltools_0.5.7        
##  [16] truncnorm_1.0-9         sass_0.4.8              sctransform_0.4.1      
##  [19] parallelly_1.36.0       KernSmooth_2.23-22      bslib_0.6.1            
##  [22] htmlwidgets_1.6.4       ica_1.0-3               plyr_1.8.9             
##  [25] plotly_4.10.3           zoo_1.8-12              cachem_1.0.8           
##  [28] igraph_1.5.1            mime_0.12               lifecycle_1.0.4        
##  [31] pkgconfig_2.0.3         Matrix_1.6-4            R6_2.5.1               
##  [34] fastmap_1.1.1           GenomeInfoDbData_1.2.11 fitdistrplus_1.1-11    
##  [37] future_1.33.0           shiny_1.8.0             digest_0.6.33          
##  [40] colorspace_2.1-0        patchwork_1.1.3         tensor_1.5             
##  [43] irlba_2.3.5.1           crosstalk_1.2.1         invgamma_1.1           
##  [46] labeling_0.4.3          progressr_0.14.0        fansi_1.0.6            
##  [49] spatstat.sparse_3.0-3   httr_1.4.7              polyclip_1.10-6        
##  [52] abind_1.4-5             compiler_4.3.2          withr_2.5.2            
##  [55] BiocParallel_1.34.2     highr_0.10              MASS_7.3-60            
##  [58] DelayedArray_0.26.6     tools_4.3.2             lmtest_0.9-40          
##  [61] httpuv_1.6.13           future.apply_1.11.0     goftest_1.2-3          
##  [64] glue_1.6.2              nlme_3.1-164            promises_1.2.1         
##  [67] grid_4.3.2              Rtsne_0.17              cluster_2.1.6          
##  [70] reshape2_1.4.4          generics_0.1.3          gtable_0.3.4           
##  [73] spatstat.data_3.0-3     data.table_1.14.10      XVector_0.40.0         
##  [76] sp_2.1-2                utf8_1.2.4              spatstat.geom_3.2-7    
##  [79] RcppAnnoy_0.0.21        RANN_2.6.1              pillar_1.9.0           
##  [82] spam_2.10-0             later_1.3.2             splines_4.3.2          
##  [85] lattice_0.22-5          survival_3.5-7          deldir_2.0-2           
##  [88] tidyselect_1.2.0        locfit_1.5-9.8          miniUI_0.1.1.1         
##  [91] pbapply_1.7-2           knitr_1.45              gridExtra_2.3          
##  [94] scattermore_1.2         xfun_0.41               DT_0.31                
##  [97] stringi_1.8.3           lazyeval_0.2.2          yaml_2.3.8             
## [100] evaluate_0.23           codetools_0.2-19        tibble_3.2.1           
## [103] cli_3.6.2               uwot_0.1.16             xtable_1.8-4           
## [106] reticulate_1.34.0       munsell_0.5.0           jquerylib_0.1.4        
## [109] Rcpp_1.0.11             globals_0.16.2          spatstat.random_3.2-2  
## [112] png_0.1-8               parallel_4.3.2          ellipsis_0.3.2         
## [115] dotCall64_1.1-1         bitops_1.0-7            listenv_0.9.0          
## [118] viridisLite_0.4.2       scales_1.3.0            ggridges_0.5.4         
## [121] crayon_1.5.2            leiden_0.4.3.1          purrr_1.0.2            
## [124] rlang_1.1.2             ashr_2.2-63